// file: 	cs-mrp.do
// purpose: Generate MRP estimates and create Fig. 4 and Table 1.	




* Start with Pew public opinion data
use cs-recode.dta, clear

*drop DC
drop if fips==11

*missing data recoded to midpoint so that all obs are included in estimate,
*recommended when using MRP given poststratification. See Katellac et al working paper

recode opst (.=2.5), gen(opst_mrp)


// Recode demographic variables into categories that match Census data
recode r_age (18/29=1)(30/44=2)(45/64=3)(65/97=4), gen(age4cat)
recode r_educ 	(1 2 =1 "Less than HS") ///
				(3=2 "HS")(4 5=3 "Some college") ///
				(6 7 = 4 "BA or more") ///
				, gen(educ4cat)
recode r_race (4 5 = 4)				

drop if age4cat==. | educ4cat==. | r_male==. | r_race==. | fips==. | opst_mrp==.a

egen sex_race = concat(r_male r_race)
egen age_educ = concat(age4cat educ4cat)


* I use linear estimate rather than dichotomous to 
* 1) avoid dichotomizing 4-point score
* 2) linear estimand is much easier computationally. Dichotomous
*	version does not converge. 

foreach yr in 1997 2001 2002 2005 2008 2009 2010 2011 2012 {

* These are the model estimates used for generating MRP estimates in the paper,
* with no pooling by year
mixed opst_mrp reppid_est pct_rural percapincreal debt_net_pc_real if year==`yr' &r_race~=.||_all:R.age4cat ||_all:R.educ4cat ||_all:R.age_educ ||_all:R.sex_race ||fips:
estimates save mrp_`yr', replace
predict reff_`yr'_*, reffects
rename reff_`yr'_1 reff_`yr'_age4cat
rename reff_`yr'_2 reff_`yr'_educ4cat
rename reff_`yr'_3 reff_`yr'_age_educ
rename reff_`yr'_4 reff_`yr'_sex_race
rename reff_`yr'_5 reff_`yr'_fips
}

* This code pools public opinion estimates over time, with year effects generating using 
* random intercepts.

mixed opst_mrp reppid_est pct_rural percapincreal debt_net_pc_real if r_race~=.||_all:R.age4cat ||_all:R.educ4cat ||_all:R.age_educ ||_all:R.sex_race ||_all:R.year ||fips:
estimates save mrp_all, replace
predict reff_all_*, reffects
rename reff_all_1 reff_all_age4cat
rename reff_all_2 reff_all_educ4cat
rename reff_all_3 reff_all_age_educ
rename reff_all_4 reff_all_sex_race
rename reff_all_5 reff_all_year
rename reff_all_6 reff_all_fips


foreach var in age4cat educ4cat age_educ sex_race fips {
preserve 
gen reff_`var'=.
foreach yr in 1997 2001 2002 2005 2008 2009 2010 2011 2012 {
replace reff_`var' = reff_`yr'_`var' if year==`yr'
}
collapse reff_`var' (sd) sd_`var'=reff_`var', by(`var' year)
save re_`var'.dta, replace
restore
}

foreach var in age4cat educ4cat age_educ sex_race year fips {
preserve 
collapse reff_all_`var' (sd) sd_all_`var'=reff_all_`var', by(`var')
save re_all_`var'.dta, replace
restore
}


clear
save mrp_est.dta, emptyok replace

use stateidentifiers.dta
drop if fips==11
expand 4
bysort fips: gen age4cat = _n
expand 4
bysort fips age4cat: gen educ4cat = _n
egen age_educ =concat(age4cat educ4cat)
expand 2
bysort fips age4cat educ4cat: gen r_male = _n-1
expand 4 
bysort fips age4cat educ4cat r_male: gen r_race = _n
egen sex_race = concat(r_male r_race)
expand 9
bysort fips age4cat educ4cat r_male sex_race: gen year=_n
recode year (1=1997)(2=2001)(3=2002)(4=2005)(5=2008)(6=2009) ///
	(7=2010)(8=2011)(9=2012)

merge m:1 fips year using stmerge.dta
		assert _merge~=1
		drop if _merge==2


foreach var in age4cat educ4cat age_educ sex_race fips {
merge m:1 year `var' using re_`var'.dta, gen(mg_`var')
}

foreach var in age4cat educ4cat age_educ sex_race year fips {
merge m:1 `var' using re_all_`var'.dta, gen(mg_all_`var')
}

*normally we would set fips random effects to 0 in missing states
* but given focus on p.o. in small states, I don't 
* want estimates driven by MRP smoothing. 

tab fips if mg_fips==2
tab fips if reff_fips==0
drop if mg_fips==2

gen yhat=.
gen yhat_all=.
foreach yr in 1997 2001 2002 2005 2008 2009 2010 2011 2012 {
estimates use mrp_`yr'.ster

replace yhat = _b[_cons] + _b[reppid_est]*reppid_est +  _b[pct_rural]*pct_rural ///
	+  _b[percapincreal]*percapincreal + _b[debt_net_pc_real]*debt_net_pc_real + ///
	reff_age4cat + reff_educ4cat + reff_age_educ  ///
	+ reff_sex_race + reff_fips if year==`yr'

}

estimates use mrp_all.ster
replace yhat_all = _b[_cons] + _b[reppid_est]*reppid_est + _b[pct_rural]*pct_rural  ///
	+  _b[percapincreal]*percapincreal + _b[debt_net_pc_real]*debt_net_pc_real + ///
	reff_all_age4cat + reff_all_educ4cat + reff_all_age_educ  ///
	+ reff_all_sex_race + reff_all_year + reff_all_fips


foreach var of varlist age4cat educ4cat region fips {
tostring `var', gen(str`var')
}

gen str cellid = strage4cat + "_" + streduc4cat + "_" + sex_race + "_" + strfips 

save mrp_est.dta, replace	

// Load counts by state-year-demographic category from ACS microdata 
use stcounts.dta, clear 

merge 1:1 cellid year using mrp_est.dta, gen (mgcellid)
* for missing cells from ACS, code as 1 observation
replace count=1 if mgcellid==2

egen sttotal = total(count), by(year fips)
gen prop_cellid = count/sttotal
gen wgt_yhat = prop_cellid*yhat
gen wgt_yhat_all = prop_cellid*yhat_all
collapse (sum) wgt_yhat wgt_yhat_all, by(year fips)

merge 1:1 fips year using stmerge.dta

merge m:1 fips using stateidentifiers.dta, nogen
*This graph can also be subsetted by region, showing negative
* relationship across all regions except the South

xtset fips year
replace wgt_yhat=. if wgt_yhat==0
replace mds1=l1.mds1 if mds1==.
replace mds2_abs=l1.mds2_abs if mds2_abs==.
replace mds1=f1.mds1 if mds1==.
replace mds2_abs=f1.mds2_abs if mds2_abs==.
replace mds1=l2.mds1 if mds1==.
replace mds2_abs=l2.mds2_abs if mds2_abs==.


* Analysis

* Fig 4
twoway ///
	scatter wgt_yhat lncs, msymbol(none) mlabel(stateabv) mlabsize(*.4) ///
		mlabposition(0) mlabcolor(black)  || ///
	lpolyci wgt_yhat lncs, acolor(gs5%20) clcolor(red%75) alwidth(none) ///
		subtitle(, size(small) nobox nobexpand ) ///
		by(year, note("") rows(3) legend(off) graphregion(color(white)) ///
			plotregion(color(white) lcolor(white) style(none))) ///
		plotregion(color(white) lcolor(white) style(none)) ///
		ysize(3) xsize(3) legend(off) ylabel(1.75(.5)3.3, angle(0) labsize(vsmall) ///
		glcolor(gs8) glwidth(vthin)) ///
		xtitle("Constituency Size (log)", size(small)) ///
		xlab(-1(1)4, labsize(vsmall))

graph export scatter-mrp-year.pdf, replace

gen unified = abs(unified_gov) 
recode unified_gov (1=1)(-1 0=0), gen(unified_dem)
recode unified_gov (-1=1)(1 0=0), gen(unified_gop)

recode year (1997=1)(2001=2)(2002=3)(2005=4)(2008=5)(2009=6)(2010=7)(2011=8)(2012=9), gen(batch)
replace batch=. if batch>9


label var unemployment "Unemployment Rate"
label var lncs "Constituency Size (log)"
label var mds1 "Legislative Professionalism - 1st Dim"
label var mds2_abs "Legislative Professionalism - 2nd Dim"
label var policy "Policy Liberalism"


* This set of models uses the previous survey year for all lags,
* 	even if the previous survey was not in year - 1. Keeps more 
*	observations in the dataset but reduces the predicted power 
*	of lagged DV. These anlayses are not reported in the paper.
xtset fips batch
xtpcse wgt_yhat lncs  i.batch , 
est store mrp1
xtpcse wgt_yhat lncs  unemployment mds1 mds2_abs policy i.batch , 
est store mrp2
xtpcse wgt_yhat lncs  unemployment mds1 mds2_abs policy l1.wgt_yhat i.batch , 
est store mrp3
xtpcse wgt_yhat lncs  s1.lncs unemployment mds1 mds2_abs policy l1.wgt_yhat i.batch , 
est store mrp4
xtpcse wgt_yhat lncs  unemployment mds1 mds2_abs policy l1.wgt_yhat i.batch i.fips , 
est store mrp5

	
* This set of models uses year as the time variable. Lagged models result
*	in lost observations but a closer relationship between the lagged DV
*	and the MRP estimate. These models are reported in Table 1. 	
xtset fips year
xtpcse wgt_yhat lncs  i.batch , 
est store mrp1
xtpcse wgt_yhat lncs  unemployment mds1 mds2_abs policy i.batch , 
est store mrp2
xtpcse wgt_yhat lncs  unemployment mds1 mds2_abs policy l1.wgt_yhat i.batch , 
est store mrp3
xtpcse wgt_yhat lncs  s1.lncs unemployment mds1 mds2_abs policy l1.wgt_yhat i.batch , 
est store mrp4
xtpcse wgt_yhat lncs  unemployment mds1 mds2_abs policy l1.wgt_yhat i.batch i.fips , 
est store mrp5
	
esttab mrp1 mrp2 mrp3 mrp4 using table1.txt, cells("b(fmt(3) star)" "se(fmt(3) par)") ///
	starlevels(+ .1 * .05 ** .01) tex stats(N r2) style(tex) ///
	label nolz drop(*batch ) ///
	order(lncs s1.lncs unemployment mds1 mds2_abs policy l1.wgt_yhat ) replace	

* For description of effect in paper using Model 3	
est restore mrp3
sum lncs if e(sample)
local fd = (r(max) - r(min))*_b[lncs]
sum wgt_yhat if e(sample)
di `fd'/(r(max) - r(min))	
	

* for footnote: 
xtset fips batch
sum s8.cshouse if year==2012
